## We acknowledge the following resources that were useful for writing the simulation code in this file:
##
## - maximum likelihood from the Optim.jl documentation
## - automatic differentiation, etc., from quantecon: https://julia.quantecon.org/more_julia/optimization_solver_packages.html

using Distributions, Optim, Plots, ProgressMeter, Weave
using CSV, Tables, StatsBase, Distributions

# N = 260601 # is full size!
N = 260601
N = 500

@show N

#Please change this path into your personal folder where the data files were placed according to the readme file explanations.
application_data = CSV.read("folder/data/sample.csv", Tables.matrix)
application_data = application_data[1:N,:]

draw_bootstrap_sample = function(df)
    n, K = size(df)
    s_index = sample(1:n,n)
    return df[s_index,:] 
end

SS = 100
Kd = 5 # 5 discrete regressors

sigma_vals = [0.01 0.05 0.1 .25 0.5 1. 2. 5. 10.]
n_sigma = length(sigma_vals)

# b1 is the vector of coefficient estimates on the discrete regressors
b1 = zeros(SS,Kd)
b1_cs = zeros(SS,Kd)
b1_hk = zeros(SS,Kd)
b1_mrv = zeros(SS,Kd)

# b2 is the 
b2 = zeros(SS)
b2_cs = zeros(SS)
b2_hk = zeros(SS,n_sigma)
b2_mrv = zeros(SS,n_sigma)

# autoregressive coefficient
r_hk = zeros(SS,n_sigma)
r_mrv = zeros(SS,n_sigma)

# thresholds
g2_mrv = zeros(SS,n_sigma)
g4_mrv = zeros(SS,n_sigma)

J = 4 # (1) bad and very bad; (2) fair; (3) good; (4) very good
k = 3 # see equation (25) in the resubmission from July 2021
            
#     child  married unemp  retired other 
β1 = [-0.030 0.139   -0.188 -0.132  -0.370]
β2 = [0.049] # coef on log income
ρ = [0.733]    # autoreg on >=3

γ2 = [-3.275]
γ3 = [0.0]
γ4 = [3.326]

for s in 1:SS

    df_s = draw_bootstrap_sample(application_data)

    Y0 = df_s[:,2]
    D0 = ifelse.(Y0 .>= 3.,1,0)

    Xc_1 = df_s[:,3] #income, child, married, unemp, retired, other
    Xd_1 = df_s[:,[6;  9; 12; 15; 18]] #income, child, married, unemp, retired, other

    Xc_2 = df_s[:,4]
    Xd_2 = df_s[:,[7; 10; 13; 16; 19]]

    Xc_3 = df_s[:,5]
    Xd_3 = df_s[:,[8; 11; 14; 17; 20]]
    
    n = length(Y0)
    
    dA = Normal(0,1)
    A = rand(dA,n)

    dU = Logistic(0,1)
    U1 = rand(dU,n)
    U2 = rand(dU,n)
    U3 = rand(dU,n)

    Y1_star = Xd_1*β1' + Xc_1.*β2 + U1 + A + D0.*ρ
    Y1 = ones(n)
    Y1 = Y1 .+ ifelse.(Y1_star.> γ2,1,0)
    Y1 = Y1 .+ ifelse.(Y1_star.> γ3,1,0)
    Y1 = Y1 .+ ifelse.(Y1_star.> γ4,1,0)
    D1 = ifelse.(Y1_star.> γ3,1,0)

    Y2_star = Xd_2*β1' + Xc_1.*β2 + U2 + A + D1.*ρ
    Y2 = ones(n)
    Y2 = Y2 .+ ifelse.(Y2_star.> γ2,1,0)
    Y2 = Y2 .+ ifelse.(Y2_star.> γ3,1,0)
    Y2 = Y2 .+ ifelse.(Y2_star.> γ4,1,0)
    D2 = ifelse.(Y2_star.> γ3,1,0)

    Y3_star = Xd_3*β1' + Xc_1.*β2 + U3 + A + D2.*ρ
    Y3 = ones(n)
    Y3 = Y3 .+ ifelse.(Y3_star.> γ2,1,0)
    Y3 = Y3 .+ ifelse.(Y3_star.> γ3,1,0)
    Y3 = Y3 .+ ifelse.(Y3_star.> γ4,1,0)
    D3 = ifelse.(Y3_star.> γ3,1,0)

    ## Glue the discrete and continous regressors
    X_1 = [Xd_1 Xc_1]
    X_2 = [Xd_2 Xc_2]
    X_3 = [Xd_3 Xc_3]

    ## Effective change in X (middle period)
    DX = X_2 - X_1
    # definition of switcher for binarized HK / binarized FEOL
    S = ifelse.(D1 .+ D2 .== 1,1,0)

    ## Binary indicator that is 1 if discrete Xs do not change
    ##  from period 2 to 3.
    fix_Xd = minimum(Xd_2 .== Xd_3, dims = 2)

    DX_f = DX[fix_Xd[:,1],:]
    Xc_3_f = Xc_3[fix_Xd[:,1]]
    Xc_2_f = Xc_2[fix_Xd[:,1]]
    D1_f = D1[fix_Xd[:,1]]
    D2_f = D2[fix_Xd[:,1]]
    D3_f = D3[fix_Xd[:,1]]
    D0_f = D0[fix_Xd[:,1]]
    S_f = S[fix_Xd[:,1]]
    Y2_f = Y2[fix_Xd[:,1]]
    Y3_f = Y3[fix_Xd[:,1]]

    ## Define the cross-sectional log likelihood function, static model, use only period 2
    function ll_cs(b)
        index = [Xd_2 Xc_2] * [b[1:Kd];b[Kd+1]]
        return -sum(D2 .* log.(cdf(dU,index)) + (1 .- D2) .* log.(1 .-cdf(dU,index)) )
    end

    ## Define the log-likelihood for the static model, using periods 1 and 2
    function ll(b)
        index = DX * [b[1:Kd];b[Kd+1]]
        return -sum(S .* (D2 .* log.(cdf(dU,index)) + (1 .- D2) .* log.(1 .-cdf(dU,index))) ) 
    end

    function ll_hk(br, sigma)
        b1 = br[1:Kd]
        b2 = br[Kd+1]
        r = br[Kd+2]

        index = DX_f * [b1;b2] .+ r.*(D3_f .- D0_f)
        gaus_kernel = Normal(0,1)
        Xc_weight = pdf.(gaus_kernel,(Xc_3_f - Xc_2_f)./sigma)
        return -sum(Xc_weight .* S_f .* (D2_f .* log.(cdf(dU,index)) + (1 .- D2_f) .* log.(1 .-cdf(dU,index))))
    end

    function ll_mrv(brc, sigma)

        bb1 = brc[1:Kd]
        bb2 = brc[Kd+1]
        r = brc[Kd+2]
        c = [0.0 brc[Kd+3] 0.0 brc[Kd+4]]  #first one is dummy, gamma2, gamm3, gamm4

        gaus_kernel = Normal(0,1)
        Xc_weight = pdf.(gaus_kernel,(Xc_3_f - Xc_2_f)./sigma)

        rval = 0.0
        for j in 2:3
            for l in 3:4

                # D0 is set
                # D1 is set

                # Compute D2 
                D2l = ifelse.(Y2_f .>= l,1,0)
                D2j = ifelse.(Y2_f .>= j,1,0)
                D2jl = ifelse.(D1_f .== 1, D2j, D2l)
                
                # Compute D3
                D3j = ifelse.(Y3_f .>= j,1,0)
                D3l = ifelse.(Y3_f .>= l,1,0)
                D3jl = ifelse.(D1_f .== 1, D3l, D3j)

                Smrv = ifelse.(D1_f .== D2jl,0,1)

                Zjl = hcat(-DX_f,D0_f .- D3jl, D3jl, 1.0 .- D3jl)
                thetajl = vcat(bb1,bb2,r,c[j],c[l])
                index = Zjl * thetajl

                rval = rval -sum(Xc_weight .* Smrv .* (D1_f .* log.(cdf(dU,index)) + (1 .- D1_f) .* log.(1 .-cdf(dU,index))))

            end
        end

        return rval

    end

    ## The static FEOL estimator
    func = TwiceDifferentiable(b -> ll(b), ones(6); autodiff=:forward)
    yay = optimize(func, [β1'; β2] )

    b1[s,:] = Optim.minimizer(yay)[1:Kd]
    b2[s] = Optim.minimizer(yay)[Kd+1]
    
    ## The cross-sectional estimator
    func_cs = TwiceDifferentiable(b -> ll_cs(b), ones(6); autodiff=:forward)
    yay_cs = optimize(func_cs, [β1';β2])
    b1_cs[s,:] = Optim.minimizer(yay_cs)[1:Kd]
    b2_cs[s] = Optim.minimizer(yay_cs)[Kd+1]
    
    for sig_index in 1:n_sigma

        func_hk = TwiceDifferentiable(br -> ll_hk(br, sigma_vals[sig_index]), ones(7); autodiff=:forward)
        yay_hk = optimize(func_hk, [β1';β2;ρ])
        b1_hk[s,:] = Optim.minimizer(yay_hk)[1:Kd]
        b2_hk[s,sig_index] = Optim.minimizer(yay_hk)[Kd+1]
        r_hk[s,sig_index] = Optim.minimizer(yay_hk)[Kd+2]
    
        func_mrv = TwiceDifferentiable(brc -> ll_mrv(brc, sigma_vals[sig_index]), ones(9); autodiff=:forward)
        yay_mrv = optimize(func_mrv, [β1';β2;ρ;γ2;γ4])
        b1_mrv[s,:] = Optim.minimizer(yay_mrv)[1:Kd]
        b2_mrv[s,sig_index] = Optim.minimizer(yay_mrv)[Kd+1]
        r_mrv[s,sig_index] = Optim.minimizer(yay_mrv)[Kd+2]
        g2_mrv[s,sig_index] = Optim.minimizer(yay_mrv)[Kd+3]
        g4_mrv[s,sig_index] = Optim.minimizer(yay_mrv)[Kd+4]
    end
end

@show N
@show SS
@show sigma_vals

@show β2
@show mean(b2)
@show mean(b2_cs)
@show mean(b2_hk, dims = 1)
@show mean(b2_mrv, dims = 1)

@show ρ
@show mean(r_hk, dims = 1)
@show mean(r_mrv, dims = 1)
@show std(r_hk, dims = 1)
@show std(r_mrv, dims = 1)

@show [γ2 γ4]
@show mean(g2_mrv, dims = 1)
@show mean(g4_mrv, dims = 1);
